# Dickstein, Ho, and Mark (2023)
# Create summary statistics table showing plans within constructed plan are similar

# Preliminaries
setwd("../../../../library")
source("PreliminariesCode.R")

# Loading Data
## SERFF
SERFFDat <- fread("/homedir/tempwrkr/nmark/sharedWork/oregon/CreatedDatasets_NDM/SERFF40_Data.txt")
SERFFDat[SERFFDat == ""] <- NA

## Payer Name Crosswalk
Simple.Payer.CC <- data.frame(
  PAYER_ID = c("M0006", "M0006", "M0013", "M0019", "M0035", "M0037", "M0062", "M0077", "M0080", "M0099", "M0099"),
  Company_Name = c("UnitedHealthcare Insurance Company", "United Healthcare of Oregon, Inc", "PacificSource Health Plans", "Providence Health Plan", "Moda Health Plan, Inc.", "Health Net Health Plan of Oregon, Inc", "LifeWise Health Plan of Oregon", "Kaiser Foundation Health Plan of the Northwest", "Regence BlueCross BlueShield of Oregon", "Oregon's Health CO-OP", "Community Care of Oregon, Inc.")
)

# Weights To Use in Constructing the Plans: 
Weights <- fread("/homedir/tempwrkr/nmark/sharedWork/oregon/CreatedDatasets_NDM/ConstructedPlanWeights.csv")

## Creating a few variables to match what we are used to:
SERFFDat$metal <- ifelse(SERFFDat$metal_tier == "Bronze", 2,
  ifelse(SERFFDat$metal_tier == "ExpandedBronze", 2,
    ifelse(SERFFDat$metal_tier == "Silver", 3,
      ifelse(SERFFDat$metal_tier == "Gold", 4,
        ifelse(SERFFDat$metal_tier == "Platinum", 5, NA)))))

SERFFDat$Market.Type[SERFFDat$Market.Type == "Small Group"] <- "SmallGroup"

SERFFDat <- merge(SERFFDat, Simple.Payer.CC, by = "Company_Name")

SERFFDat$MNC <- ifelse(substr(SERFFDat$plantype, 1, 3) == "PPO" | substr(SERFFDat$plantype, 1, 3) == "POS", "NOT", NA)
SERFFDat$MNC <- ifelse(substr(SERFFDat$plantype, 1, 3) == "EPO" | substr(SERFFDat$plantype, 1, 3) == "HMO", "MNC", SERFFDat$MNC)

SERFFDat[, constrplanid := paste0(substr(Market.Type, 1, 2),
  substr(Exchange, 1, 2),
  ratingarea,
  substr(PAYER_ID, 4, 5),
  metal,
  substr(MNC, 1, 3))]

## Creating a Variable for the second cheapest silver plan:
SecondCheapestSilver <- SERFFDat[metal_tier == "Silver" & Market.Type == "Individual" & Exchange == "On Exchange", ]
SecondCheapestSilver <- SecondCheapestSilver[, 
  SecondCheapestPrem := min(Individual_Rate[-which.min(Individual_Rate)]), by = c("ratingarea", "year")]
SecondCheapestSilver <- SecondCheapestSilver[Individual_Rate == SecondCheapestPrem]

SERFFDat <- SERFFDat[CostShareType == "Standard"]

## Creating the best_guess values of things that we care about (in-network deductible, oopmax, copay, coins): 
TranslateFamilyDeductible <- function(x, i){
  R <- as.numeric(gsub("[a-zA-Z $]", "", unlist(lapply(strsplit(x, split = "|", fixed = T), function(x) x[i]))))
  return(R)
}

SERFFDat$best_guess_deductintegrated <-  ifelse(SERFFDat$med_drug_deduct_integrated == "Yes", 1, 0)
SERFFDat$med_drug_deduct_in_net_fam_ind <- TranslateFamilyDeductible(SERFFDat$med_drug_deduct_in_net_fam, 1)
SERFFDat$med_drug_deduct_in_net_fam_grp <- TranslateFamilyDeductible(SERFFDat$med_drug_deduct_in_net_fam, 2)
SERFFDat$med_deduct_in_net_fam_ind <- TranslateFamilyDeductible(SERFFDat$med_deduct_in_net_fam, 1)
SERFFDat$med_deduct_in_net_fam_grp <- TranslateFamilyDeductible(SERFFDat$med_deduct_in_net_fam, 2)
SERFFDat$drug_deduct_in_net_fam_ind <- TranslateFamilyDeductible(SERFFDat$drug_deduct_in_net_fam, 1)
SERFFDat$drug_deduct_in_net_fam_grp <- TranslateFamilyDeductible(SERFFDat$drug_deduct_in_net_fam, 2)
SERFFDat$best_guess_deductible_ind <- as.numeric(ifelse(SERFFDat$med_drug_deduct_integrated == "Yes", 
  SERFFDat$med_drug_deduct_in_net_fam_ind,
  SERFFDat$med_deduct_in_net_fam_ind + SERFFDat$drug_deduct_in_net_fam_ind)) 
SERFFDat$best_guess_deductible_grp <- as.numeric(ifelse(SERFFDat$med_drug_deduct_integrated == "Yes", 
  SERFFDat$med_drug_deduct_in_net_fam_grp,
  SERFFDat$med_deduct_in_net_fam_grp + SERFFDat$drug_deduct_in_net_fam_grp)) 
SERFFDat$best_guess_pcp_copay <- gsub("[a-zA-Z $]", "", SERFFDat$pcp_injure_ill_copay_tier1)
SERFFDat$best_guess_pcp_coinsure <- gsub("[a-zA-Z %]", "", SERFFDat$pcp_injure_ill_coinsure_tier1)
SERFFDat$best_guess_specialist_copay <- gsub("[a-zA-Z $]", "", SERFFDat$specialist_copay_tier1)
SERFFDat$best_guess_specialist_coinsure <- gsub("[a-zA-Z %]", "", SERFFDat$specialist_coinsure_tier1 )

# Cleaning 
SERFFDat[SERFFDat == ""] <- NA

## Assigning the weights to HIOS plans within a constructed plan:
SERFFDat[, premiumrank := rank(Individual_Rate), by = c("constrplanid", "year")]
SERFFDat[, size := .N, by = c("constrplanid", "year")]
SERFFDat <- merge(SERFFDat, Weights, by = c("Plan_ID", "constrplanid", "year"), all.x = T)

## Collapsing to the Constructed Plan Data:
ConstructedPlanDat <- SERFFDat[,c(
  list(Size = .N, 
    minimumprem = min(Individual_Rate, na.rm = TRUE),
    medianprem = median(Individual_Rate, na.rm = TRUE),
    maximumprem = max(Individual_Rate, na.rm = TRUE),
    n_networks = length(unique(networkid)),
    n_servicearea = length(unique(serviceareaid)),
    n_designtype = length(unique(designtype)),
    n_ppo = sum(substr(plantype, 1, 3) == "PPO"),
    n_epo = sum(substr(plantype, 1, 3) == "EPO"),
    p_nationalnetwork = mean(nationalnetwork == "Yes", na.rm = T),
    p_hsaeligible = mean(hsaeligible == "Yes", na.rm = T)),
    lapply(.SD, function(x) mean(x, na.rm = TRUE))), 
  by = c("ratingarea", "metal", "MNC", "year", "PAYER_ID", "Market.Type", "Exchange", "constrplanid"), 
  .SDcols = c(
    "Individual_Rate",
    "best_guess_deductintegrated",
    "best_guess_deductible_grp",
    "best_guess_deductible_ind",
    "best_guess_pcp_copay",
    "best_guess_pcp_coinsure",
    "best_guess_specialist_copay",
    "best_guess_specialist_coinsure")]

ConstructedPlanDat[, 
  market := ifelse(Market.Type == "Individual" & Exchange == "On Exchange", 
    1,
    ifelse(Market.Type == "Individual" & Exchange == "Off Exchange",
      2, 
      ifelse(Market.Type == "SmallGroup" & Exchange == "On Exchange",
        3, 
        ifelse(Market.Type == "SmallGroup" & Exchange == "Off Exchange",
          4, NA)))), ]

ConstructedPlanDat <- ConstructedPlanDat[!is.na(metal) & !is.na(MNC) & !is.na(PAYER_ID) & !is.na(ratingarea), ]

# Creating table of summary statistics
# only keep the relevant variables
ConstructedPlanDat2 <- ConstructedPlanDat[, .(
  plans = Size,
  networks = n_networks,
  service_areas = n_servicearea,
  plan_types = ifelse(n_ppo/Size == 0 | n_ppo/Size == 1, 1, 2),
  national_network_statuses = ifelse(p_nationalnetwork == 0 | p_nationalnetwork == 1, 1, 2),
  hsa_eligible_statuses = ifelse(p_hsaeligible == 0 | p_hsaeligible == 1, 1, 2),
  market = Market.Type
),]

library(vtable)
var.labs <- data.frame(
  var = c(
    'plans','networks', 'service_areas','plan_types', 'national_network_statuses', 'hsa_eligible_statuses'),
  labels = c('Plans','Networks', 'Service Areas','Plan Types', 'National Network Statuses', 'HSA Eligible Statuses'))

summstats <- st(ConstructedPlanDat2, 
  labels = var.labs,
  summ = c('min(x)', 'pctile(x)[25]', 'median(x)', 'pctile(x)[75]', 'max(x)', 'mean(x)'),
  summ.names = c("Min", "25%", "50%", "75%", "Max", "Mean"),
  group = 'market', group.long=TRUE, digits = 1, out="csv")
fwrite(summstats, paste0(project_folder, "/analysis/tablesandfigures/release/misc/ConstructedPlan_summstats.csv"))

summstats_num_obs <- st(ConstructedPlanDat2, 
  labels = var.labs,
  summ = c('notNA(x)', 'min(x)', 'pctile(x)[25]', 'median(x)', 'pctile(x)[75]', 'max(x)', 'mean(x)'),
  summ.names = c("N", "Min", "25%", "50%", "75%", "Max", "Mean"),
  group = 'market', group.long=TRUE, digits=1, out="csv")
fwrite(summstats_num_obs, paste0(project_folder, "/analysis/tablesandfigures/release/misc/ConstructedPlan_summstats_num_obs.csv"))
